import collections
import csv
import pandas as pd
from pprint import pprint
from tqdm import tqdm
import tsm

media = []
netp = pd.read_csv('election_retweeter_polarization_media_scores.csv',
                   keep_default_na=False)
twitter = pd.read_csv('netp_twitter.csv',
                      keep_default_na=False)
twitter_dict = dict(zip(twitter.media_id.tolist(),
                        twitter.twitter_checked.tolist()))
netp['twitter_checked'] = netp.media_id.apply(lambda x: twitter_dict[x])
netp['twitter_checked'] = netp.twitter_checked.str.lower()
handles = set(netp.twitter_checked)
handles_rt = set(["rt @" + i + ":" for i in handles if i != '-'])
media_dict = dict(zip(netp.twitter_checked.tolist(),
                      netp.partition.tolist()))

with open('knight2_sci.csv','r',encoding='utf-8') as f:
    reader = csv.reader(f)
    for row in tqdm(reader):
        if len(row) == 2 and any(h 
                                 in row[1].lower() 
                                 for h 
                                 in handles_rt):
            media.append(row)
            
aud_dict = {i[0].lower():[0,0,0,0,0] for i in media}
media_edges = tsm.t2e(media,'RTS_ONLY')

for i in media_edges:
    if i[0] in aud_dict and i[1] in media_dict:
        aud_dict[i[0]][media_dict[i[1]] - 1] += 1 #so all the partition ids are off by 1

min_rts = 3

echo_dict = collections.OrderedDict({'farleft':0,
                                     'ctrleft':0,
                                     'center':0,
                                     'ctrright':0,
                                     'farright':0})
             
for n,i in enumerate(echo_dict):
    #all users who retweeted a partition account at least once (what we call that partition's "audience") and sent at least min_rts retweets total
    audience = {j:aud_dict[j] 
                for j 
                in aud_dict 
                if aud_dict[j][n] >= 1 
                and sum(aud_dict[j]) >= min_rts}
    #for each partition audience member, proportion of retweets that went to that partition
    audience_80 = {j:audience[j][n]/sum(audience[j]) 
                   for j 
                   in audience} 
    #overall proportion of partition audience members who sent at least 80% of their retweets to that partition
    echo_dict[i] = len([j 
                        for j 
                        in audience_80 
                        if audience_80[j] >= .8])/len(audience)

pprint(echo_dict)